5200 Final Project Pie Chart

Salary Pie Chart

library(tidyverse)
# Source: https://legacy.baseballprospectus.com/compensation/?team=WAS
salary_2019 <- read_csv("Salary_2019.csv")
# Source: https://www.spotrac.com/mlb/washington-nationals/payroll/2023/
salary_2023 <- read_csv("Salary_2023.csv")

# Looking at position and salary column
salary_2019 <- salary_2019 %>% dplyr::select("Pos","Salary") %>% na.omit()
salary_2023 <- salary_2023 %>% dplyr::select("POS.","BASE SALARY") %>% na.omit()

# check positions in each dataset
unique(salary_2019$Pos)
 [1] "SP"  "3B"  "1B"  "2B"  "RF"  "C"   "RP"  "SS"  "CF"  "LF"  "DNP" "PH" 
unique(salary_2023$`POS.`)
 [1] "SP"    "RP/CL" "RF"    "1B"    "RP"    "C"     "2B"    "SS"    "LF"   
[10] "3B"    "OF"    "CF"   
# replace the 2023 "RP/CL" to "RP", change the dollar format into value format
salary_2023 <- salary_2023 %>% mutate(Pos = ifelse(`POS.`!="RP/CL",`POS.`,"RP")) %>% mutate(Salary_2023=as.numeric(gsub("\\$", "", gsub(",", "", `BASE SALARY`))))  %>% dplyr::select(Pos, Salary_2023)

# change the 2019 data dollar format into value format
salary_2019 <- salary_2019 %>% mutate(Salary_2019=as.numeric(gsub("\\$", "", gsub(",", "", Salary))))

# Group by position
salary_2023_1 <- salary_2023 %>% group_by(Pos) %>% summarize(salary_sum_2023=sum(as.numeric(Salary_2023)))
# calculate total
salary_2023_1$total_2023 <- sum(salary_2023_1$salary_sum_2023)
# calculate percentage
salary_2023_1$percetage_2023 <- salary_2023_1$salary_sum_2023/salary_2023_1$total_2023

# Group by position
salary_2019_1 <- salary_2019 %>% group_by(Pos) %>% summarize(salary_sum_2019=sum(as.numeric(Salary_2019)))
# calculate total
salary_2019_1$total_2019 <- sum(salary_2019_1$salary_sum_2019)
# calculate percentage
salary_2019_1$percetage_2019 <- salary_2019_1$salary_sum_2019/salary_2019_1$total_2019

# Joion the table
salary_df <- salary_2019_1 %>% left_join(salary_2023_1,by="Pos")

# Replace NA with 0
salary_df$salary_sum_2023[is.na(salary_df$salary_sum_2023)] <- 0
salary_df$percetage_2023[is.na(salary_df$percetage_2023)] <- 0

# Get the total salary for 2019 and 2023
salary_total_2019 <- mean(salary_df$total_2019,na.rm=TRUE)
salary_total_2023 <- mean(salary_df$total_2023,na.rm=TRUE)
library(plotly)
# Create an interactive pie chart for 2019
pie_chart_2019 <- plot_ly(
  labels = ~salary_df$Pos,            # Labels from the 'brands' vector
  values = ~salary_df$salary_sum_2019,      # Values from the 'market_share' vector
  type = 'pie',                # Specify the chart type to be pie
  textinfo = 'label+percent',  # Display labels and percentage on the chart
  textposition = 'inside',     # Position the text inside the slices
  marker = list(line = list(color = '#FFFFFF', width = 2)), # Set slice borders
  height = sqrt(salary_total_2019)/30, 
  width = sqrt(salary_total_2019)/30
)

# Adding title and enhancing the layout
pie_chart_2019 <- layout(pie_chart_2019, 
                      title = 'Salary By Positions in 2019')

# Create an interactive pie chart for 2023
pie_chart_2023 <- plot_ly(
  labels = ~salary_df$Pos,            # Labels from the 'brands' vector
  values = ~salary_df$salary_sum_2023,      # Values from the 'market_share' vector
  type = 'pie',                # Specify the chart type to be pie
  textinfo = 'label+percent',  # Display labels and percentage on the chart
  textposition = 'inside',     # Position the text inside the slices
  marker = list(line = list(color = '#FFFFFF', width = 2)), # Set slice borders
  height = sqrt(salary_total_2023)/30, 
  width = sqrt(salary_total_2023)/30
)

# Adding title and enhancing the layout
pie_chart_2023 <- layout(pie_chart_2023, 
                      title = 'Salary By Positions in 2023')
pie_chart_2023
pie_chart_2019